dplyr for data manipulationggplot for data visualizationgapminder for datadplyrggplot2file_url <- "https://storage.googleapis.com/learn_pd_like_tidyverse/gapminder.csv"
gap_minder <- read.csv(file_url, stringsAsFactors = FALSE)dplyrinstall.packages("dplyr")filter()select()arrange()mutate()summarise()group_by()filter() for subsetting rowslibrary(dplyr)
gap_minder %>%
filter(country == "Taiwan")## country continent year lifeExp pop gdpPercap
## 1 Taiwan Asia 1952 58.50 8550362 1206.948
## 2 Taiwan Asia 1957 62.40 10164215 1507.861
## 3 Taiwan Asia 1962 65.20 11918938 1822.879
## 4 Taiwan Asia 1967 67.50 13648692 2643.859
## 5 Taiwan Asia 1972 69.39 15226039 4062.524
## 6 Taiwan Asia 1977 70.59 16785196 5596.520
## 7 Taiwan Asia 1982 72.16 18501390 7426.355
## 8 Taiwan Asia 1987 73.40 19757799 11054.562
## 9 Taiwan Asia 1992 74.26 20686918 15215.658
## 10 Taiwan Asia 1997 75.25 21628605 20206.821
## 11 Taiwan Asia 2002 76.99 22454239 23235.423
## 12 Taiwan Asia 2007 78.40 23174294 28718.277
select() for extracting columnsgap_minder %>%
filter(country == "Taiwan") %>%
select(year, gdpPercap, lifeExp)## year gdpPercap lifeExp
## 1 1952 1206.948 58.50
## 2 1957 1507.861 62.40
## 3 1962 1822.879 65.20
## 4 1967 2643.859 67.50
## 5 1972 4062.524 69.39
## 6 1977 5596.520 70.59
## 7 1982 7426.355 72.16
## 8 1987 11054.562 73.40
## 9 1992 15215.658 74.26
## 10 1997 20206.821 75.25
## 11 2002 23235.423 76.99
## 12 2007 28718.277 78.40
arrange() for sortinggap_minder %>%
filter(continent == "Asia") %>%
filter(year == 2007) %>%
arrange(gdpPercap)## country continent year lifeExp pop gdpPercap
## 1 Myanmar Asia 2007 62.069 47761980 944.0000
## 2 Afghanistan Asia 2007 43.828 31889923 974.5803
## 3 Nepal Asia 2007 63.785 28901790 1091.3598
## 4 Bangladesh Asia 2007 64.062 150448339 1391.2538
## 5 Korea, Dem. Rep. Asia 2007 67.297 23301725 1593.0655
## 6 Cambodia Asia 2007 59.723 14131858 1713.7787
## 7 Yemen, Rep. Asia 2007 62.698 22211743 2280.7699
## 8 Vietnam Asia 2007 74.249 85262356 2441.5764
## 9 India Asia 2007 64.698 1110396331 2452.2104
## 10 Pakistan Asia 2007 65.483 169270617 2605.9476
## 11 West Bank and Gaza Asia 2007 73.422 4018332 3025.3498
## 12 Mongolia Asia 2007 66.803 2874127 3095.7723
## 13 Philippines Asia 2007 71.688 91077287 3190.4810
## 14 Indonesia Asia 2007 70.650 223547000 3540.6516
## 15 Sri Lanka Asia 2007 72.396 20378239 3970.0954
## 16 Syria Asia 2007 74.143 19314747 4184.5481
## 17 Iraq Asia 2007 59.545 27499638 4471.0619
## 18 Jordan Asia 2007 72.535 6053193 4519.4612
## 19 China Asia 2007 72.961 1318683096 4959.1149
## 20 Thailand Asia 2007 70.616 65068149 7458.3963
## 21 Lebanon Asia 2007 71.993 3921278 10461.0587
## 22 Iran Asia 2007 70.964 69453570 11605.7145
## 23 Malaysia Asia 2007 74.241 24821286 12451.6558
## 24 Saudi Arabia Asia 2007 72.777 27601038 21654.8319
## 25 Oman Asia 2007 75.640 3204897 22316.1929
## 26 Korea, Rep. Asia 2007 78.623 49044790 23348.1397
## 27 Israel Asia 2007 80.745 6426679 25523.2771
## 28 Taiwan Asia 2007 78.400 23174294 28718.2768
## 29 Bahrain Asia 2007 75.635 708573 29796.0483
## 30 Japan Asia 2007 82.603 127467972 31656.0681
## 31 Hong Kong, China Asia 2007 82.208 6980412 39724.9787
## 32 Singapore Asia 2007 79.972 4553009 47143.1796
## 33 Kuwait Asia 2007 77.588 2505559 47306.9898
mutate() for creating new columnsgap_minder %>%
filter(country == "Taiwan") %>%
mutate(gdp_million = (gdpPercap * pop / 1000000))## country continent year lifeExp pop gdpPercap gdp_million
## 1 Taiwan Asia 1952 58.50 8550362 1206.948 10319.84
## 2 Taiwan Asia 1957 62.40 10164215 1507.861 15326.23
## 3 Taiwan Asia 1962 65.20 11918938 1822.879 21726.78
## 4 Taiwan Asia 1967 67.50 13648692 2643.859 36085.21
## 5 Taiwan Asia 1972 69.39 15226039 4062.524 61856.15
## 6 Taiwan Asia 1977 70.59 16785196 5596.520 93938.68
## 7 Taiwan Asia 1982 72.16 18501390 7426.355 137397.89
## 8 Taiwan Asia 1987 73.40 19757799 11054.562 218413.81
## 9 Taiwan Asia 1992 74.26 20686918 15215.658 314765.07
## 10 Taiwan Asia 1997 75.25 21628605 20206.821 437045.35
## 11 Taiwan Asia 2002 76.99 22454239 23235.423 521733.75
## 12 Taiwan Asia 2007 78.40 23174294 28718.277 665525.79
summarise() for…a summarygap_minder %>%
summarise(median(gdpPercap))## median(gdpPercap)
## 1 3531.847
group_by() for a grouped summarygap_minder %>%
group_by(continent) %>%
summarise(medianGdpPercap = median(gdpPercap))## # A tibble: 5 x 2
## continent medianGdpPercap
## <chr> <dbl>
## 1 Africa 1192.
## 2 Americas 5466.
## 3 Asia 2647.
## 4 Europe 12082.
## 5 Oceania 17983.
The grammar of graphics.
ggplot2install.packages("ggplot2")ggplot(aes(x = , y = , color = , fill = , ...)) for data mappinggeom_OOO() for different charts`+ to add different layersgeom_point() for exploring correlationsMaking a scatter plot
library(ggplot2)
gap_minder %>%
ggplot(aes(x = gdpPercap, y = lifeExp)) +
geom_point()geom_histogram() for exploring distributionsgap_minder %>%
ggplot(aes(x = gdpPercap)) +
geom_histogram(bins = 40)geom_bar() for exploring row countsgap_minder %>%
filter(year == 2007) %>%
ggplot(aes(x = continent)) +
geom_bar()geom_bar() for grouped summarygap_minder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarise(medianGdpPercap = median(gdpPercap)) %>%
ggplot(aes(x = continent, y = medianGdpPercap)) +
geom_bar(stat = "identity")plotlyinstall.packages("plotly")library(plotly)
radius <- sqrt((gap_minder$pop)/pi)
p <- gap_minder %>%
plot_ly(
x = ~gdpPercap,
y = ~lifeExp,
size = ~pop,
color = ~continent,
frame = ~year,
text = ~country,
hoverinfo = "text",
type = 'scatter',
mode = 'markers',
sizes = c(min(radius), max(radius))
) %>%
layout(
xaxis = list(
type = "log"
)
)p